Data Tidying

tibble [1,222,433 × 7] (S3: tbl_df/tbl/data.frame)
 $ id  : chr [1:1222433] "USC00300023" "USC00300023" "USC00300023" "USC00300023" ...
 $ date: Date[1:1222433], format: "1981-01-03" "1981-01-05" ...
 $ prcp: int [1:1222433] 0 0 0 0 0 0 0 0 0 117 ...
 $ snow: int [1:1222433] 0 0 0 0 0 0 0 0 0 127 ...
 $ snwd: int [1:1222433] 0 0 0 0 0 0 0 0 0 76 ...
 $ tmax: chr [1:1222433] "-122" "-56" "-122" "-67" ...
 $ tmin: chr [1:1222433] "-206" "-178" "-306" "-289" ...
 - attr(*, "spec")=
  .. cols(
  ..   id = col_character(),
  ..   date = col_date(format = ""),
  ..   prcp = col_integer(),
  ..   snow = col_integer(),
  ..   snwd = col_integer(),
  ..   tmax = col_character(),
  ..   tmin = col_character()
  .. )
tibble [1,222,433 × 7] (S3: tbl_df/tbl/data.frame)
 $ id  : chr [1:1222433] "USC00300023" "USC00300023" "USC00300023" "USC00300023" ...
 $ date: Date[1:1222433], format: "1981-01-03" "1981-01-05" ...
 $ prcp: int [1:1222433] 0 0 0 0 0 0 0 0 0 117 ...
 $ snow: int [1:1222433] 0 0 0 0 0 0 0 0 0 127 ...
 $ snwd: int [1:1222433] 0 0 0 0 0 0 0 0 0 76 ...
 $ tmax: int [1:1222433] -122 -56 -122 -67 -50 -11 61 17 6 22 ...
 $ tmin: int [1:1222433] -206 -178 -306 -289 -106 -150 -67 -106 -50 -44 ...
 - attr(*, "spec")=
  .. cols(
  ..   id = col_character(),
  ..   date = col_date(format = ""),
  ..   prcp = col_integer(),
  ..   snow = col_integer(),
  ..   snwd = col_integer(),
  ..   tmax = col_character(),
  ..   tmin = col_character()
  .. )
tibble [39,288 × 8] (S3: tbl_df/tbl/data.frame)
 $ id  : chr [1:39288] "USC00300023" "USC00300023" "USC00300023" "USC00300023" ...
 $ date: Date[1:39288], format: "2001-01-01" "2001-01-02" ...
 $ prcp: int [1:39288] 0 0 0 3 0 3 0 0 0 0 ...
 $ snow: int [1:39288] 0 0 0 25 0 13 0 0 0 0 ...
 $ snwd: int [1:39288] 51 25 25 51 25 51 51 51 0 0 ...
 $ tmax: int [1:39288] -56 -11 -50 -11 -11 -28 -6 -6 -28 -28 ...
 $ tmin: int [1:39288] -89 -156 -139 -56 -56 -117 -94 -117 -106 -122 ...
 $ year: num [1:39288] 2001 2001 2001 2001 2001 ...
 - attr(*, "spec")=
  .. cols(
  ..   id = col_character(),
  ..   date = col_date(format = ""),
  ..   prcp = col_integer(),
  ..   snow = col_integer(),
  ..   snwd = col_integer(),
  ..   tmax = col_character(),
  ..   tmin = col_character()
  .. )

Column

Scatter Plot of Snowfall vs. Precipitation

Column

Box Plot of Snow Depth by Weather Station ID

Bar Plot of Weather Station ID Frequency

---
title: "NOAA Dashboard"
output: 
  flexdashboard::flex_dashboard:
    orientation: columns
    vertical_layout: fill
    source: embed
---

```{r setup, include=FALSE}
library(tidyverse)
library(p8105.datasets)
library(plotly)
library(dplyr)
library(rnoaa)
library(lubridate)
library(ggplot2)

library(flexdashboard)
```
# Data Tidying
```{r}
ny_noaa_dropna = ny_noaa %>% drop_na(prcp, snow, snwd, tmax, tmin) %>% janitor::clean_names()
str(ny_noaa_dropna)
#All data are the appropriate formats, except for tmax and tmin, which should be integers. I will convert those.

ny_noaa_clean = ny_noaa_dropna %>% mutate(tmax = as.integer(tmax), tmin = as.integer(tmin))
str(ny_noaa_clean)

ny_noaa_recent_subzero = ny_noaa_clean %>% mutate(year = year(date)) %>% filter (year > 2000, tmin < 0, tmax< 0)
str(ny_noaa_recent_subzero)
#I restricted to recent years and low temperatures to make the dataset more manageable.
```



Column {data-width=650}
-----------------------------------------------------------------------

# Scatter Plot of Snowfall vs. Precipitation

```{r}
ny_noaa_recent_subzero %>% mutate(text_label = str_c("Tmax ºC: ", tmax, "\nTmin ºC: ", tmin)) %>% plot_ly(
    x = ~prcp, y = ~snow, type = "scatter", mode = "markers", color = ~snwd, text = ~ text_label, alpha = 0.5)
```


Column {data-width=350}
-----------------------------------------------------------------------

# Box Plot of Snow Depth by Weather Station ID


```{r}
ny_noaa_recent_subzero %>%  
  plot_ly(y = ~snwd, color = ~id, type = "box", colors = "viridis")
```

# Bar Plot of Weather Station ID Frequency

```{r}
ny_noaa_recent_subzero %>% count(id) %>% 
  mutate(id = fct_reorder(id, n)) %>% 
  plot_ly(x = ~id, y = ~n, color = ~id, type = "bar", colors = "viridis")
```